## Replication Code
## Accountability for the Local Economy at All Levels of Government in United States Elections
## Justin de Benedictis-Kessner Christopher Warshaw
## Run this code 3rd


# ------------------- #
#### Load packages ####
# ------------------- #
library(tidyverse)
library(ggmap) # for geocoding
library(sf)
library(foreign)
library(haven)
library(lfe)
library(tigris)
options(tigris_use_cache = T,tigris_class = "sf")

# -------------------------- #
#### preliminary business ####
# -------------------------- #

options(scipen=999,stringsAsFactors = F)

set.seed(6)

## Flag for whether we need to redact licensed elections data
include_licensed <- 0

# ----------------------------- #
#### Bring in elections data ####
# ----------------------------- #
### County elections from DBK & Warshaw 2018:
county_elecs <- read_csv("Election_Returns/county_elecs.csv")
# note: uncontested races are in here but may have demshare of -0.5 or 0.5
counties <- county_elecs %>%
	group_by(fips,county,state,year) %>%
	summarize(avg_demshare = mean(demshare,na.rm=T))
# note: using na.rm=T above makes it such that counties with some districts with NA values of demshare get included, but without those districts

councilcomp <- read_csv("Partisan_Control_and_Incumbency/councilcomp.csv")
councilcomp <- mutate(councilcomp,
											dem_win_control = as.numeric(seats_dem/seats_total>=0.5))
councilcomp <- councilcomp %>%
	group_by(fips) %>%
	mutate(dem_control = dplyr::lag(dem_win_control,order_by=year,n=1))
counties <- left_join(counties,councilcomp[,c("fips","year","dem_control")],by=c("fips" = "fips","year" = "year"))

## attach cities to their corresponding counties
# first downloading county shapefiles for the states where we have mayoral elections
counties_shp <- tigris::counties(cb = T,year = 2016) # all 3233 in the US
counties_shp <- counties_shp %>%
	select(GEOID, NAME) %>%
	rename(county_fips = GEOID,
				 county_name = NAME) %>%
	st_transform(102003)
st_crs(counties_shp) # checking that this is projectd (i.e. not just long/lat)

### City elections from dBK and Warshaw 2016:
read_mayors<-0
if(read_mayors==1){
mayors <- read_csv("Election_Returns/mayoral_election_check_all75k.csv")
mayors <- mayors %>%
	select(fips, govid_14, Name, STATEAB, elecdate, YearData, population_est,
				 mayor_name, mayor_votes_final, mayor_party_final,
				 runnerup_name, runnerup_votes_final, runnerup_party_final,
				 demshare) %>%
	rename(state = STATEAB,
				 year = YearData)
mayors$city <- gsub( " TOWNSHIP", "",mayors$Name)
mayors$city <- gsub( " TOWN", "",mayors$city)
mayors$city <- gsub( " CITY AND BOROUGH", "",mayors$city)
mayors$city <- gsub( " CITY AND COUNTY", "",mayors$city)
mayors$city <- gsub( " CITY-PARISH", "",mayors$city)
mayors$city <- gsub( " CITY", "",mayors$city)
mayors$city <- gsub( " VILLAGE", "",mayors$city)
mayors$city <- gsub( "ST ", "St. ",mayors$city)
mayors$city <- gsub( "FT ", "Fort ",mayors$city)
mayors$city <- gsub( "MT ", "Mount ",mayors$city)
mayors$city <- gsub( "WASHINGTON DC", "Washington",mayors$city)
mayors$city <- gsub( "WILKES BARRE", "WILKES-BARRE",mayors$city)
mayors$city <- gsub( "WINSTON SALEM", "WINSTON-SALEM",mayors$city)
mayors$city <- gsub( " MUNICIPALITY", "",mayors$city)
mayors$city <- tolower(mayors$city)

mayors$city <- gsub( "lexington-fayette urban co govt", "lexington",mayors$city)
mayors$city <- gsub( "lexington-fayette urban county", "lexington",mayors$city)
mayors$city <- gsub( "lexington-fayette urban county g", "lexington",mayors$city)
mayors$city <- gsub( "lexington-fayette urban county government",
										"lexington",mayors$city)
mayors <- mayors %>%
	select(-Name)
mayors <- subset(mayors, population_est>50000)

mayors_shp <- st_as_sf(mayors,coords = c("lon","lat")) %>%
	st_set_crs(value = 4269) %>%
	st_transform(102003)
# join county info to mayoral elections data:
mayors_shp <- st_join(mayors_shp,counties_shp,join=st_within,left=T)
mayors$county_fips <- mayors_shp$county_fips[match(mayors$fips,mayors_shp$fips)]
mayors$county_name <- mayors_shp$county_name[match(mayors$fips,mayors_shp$fips)]

mayors <- mayors %>%
	mutate(dem_win = as.numeric(demshare>=0.5))
mayors <- mayors %>%
	group_by(fips) %>%
	mutate(dem_control = dplyr::lag(dem_win,order_by=year,n=1))
 write.csv(mayors, "Election_Returns/mayors_elecs.csv",row.names = F)
}
mayors <- read_csv("Election_Returns/mayors_elecs.csv")


counties$year[which(counties$year==20006)] <- 2006
counties$year[which(counties$year==2996)] <- 2006
counties <- counties %>%
	rename(countyleg_avg_demshare = avg_demshare,
				 county_dem_control = dem_control
				 # county_dailypaper = dailypaper,
	) 

# election returns from previous merge scripts:
election_returns <- read.dta(file="elections_returns3.dta")
election_returns <- dplyr::rename(election_returns, county_fips=fips)

# ---------------------------- #
#### Bring in economic data ####
# ---------------------------- #
load(file="county_income.RData") # object named econ

# county ideology data from Tausanovitch and Warshaw
county_TW_ideology_estimates <- read.csv(file="Economy/county_TW_ideology_estimates.csv")
county_TW_ideology_estimates$state_fips_numeric <- as.numeric(as.vector(county_TW_ideology_estimates$state_fips))

county_TW_ideology_estimates2 <- dplyr::select(county_TW_ideology_estimates, state_fips_numeric, county_fips, abb)

election_returns <- merge(election_returns, county_TW_ideology_estimates2, by.y=c("abb", "county_fips"), by.x=c("state_abb", "county_fips" ), all.x=T, all.y=T)
election_returns$county_name <- gsub(" parish", "",election_returns$county_name)

## name change for Shannon County, SD
election_returns$county_fips[election_returns$county_fips==46113] <-46102
#election_returns$county_fips[election_returns$county_fips==55115]<- 55901

election_returns$county_fips <- as.character(election_returns$county_fips)

# election_returns$county_fips[election_returns$county_fips==46113 & !is.na(election_returns$county_fips)] <- 46102

election_returns$fips_bak <- election_returns$fips
# fix VA city/county problems:
VA_cities_counties_link <- read.csv(file="VA_cities_counties_link.csv")
election_returns <- merge(election_returns, VA_cities_counties_link, by.x="county_fips",by.y="fips", all.x=T)
election_returns$county_fips[!is.na(election_returns$fips_va)] <- election_returns$fips_va[!is.na(election_returns$fips_va)]

election_returns$county_fips <- as.numeric(as.vector(election_returns$county_fips))
econ$fips <- as.numeric(as.vector(econ$fips))

econ <- merge(econ, election_returns, by.x=c( "year", "fips"), 
by.y=c( "year", "county_fips"), all.x=T, all.y=T)

# keep limited set of vars:
econ_formerge <- econ %>%
	select(fips,  county_name, state_abb, population,
				 year,  
				 sen_dem_votes, sen_rep_votes, sen_total_votes,
				 pres_dem_votes, pres_rep_votes, pres_total_votes,
				 house_dem_votes, house_rep_votes, house_total_votes,
				 gov_dem_votes, gov_rep_votes, gov_total_votes, 
				 stateoffice_dem,
				 sld_dem_votes, sld_rep_votes,
				 pres_dem, sen_dem, gov_dem, house_dem, pres_dem_incumbent2,
				 earnings_perworker_real, wages_perworker_real, earnings_pc_real, qcew_pay_perworker_real, employment
				 )
				 
econ_formerge	<- econ_formerge %>%
	rename(
			stateoffice_demshare = stateoffice_dem
)

counties$fips <- as.numeric(counties$fips)
counties$fips <- as.character(counties$fips)
counties$year <- as.numeric(counties$year)

econ_formerge$fips<-as.character(econ_formerge$fips)
econ_formerge$year<-as.numeric(as.vector(econ_formerge$year))
econ_counties <- left_join(econ_formerge,
													 counties[,c("fips","year","countyleg_avg_demshare","county_dem_control")],
													 by=c("fips" = "fips","year" = "year"))

## Newspapers data:
papers_counties <- read_csv("Media/counties_papers_panel.csv")
papers_counties$fips <- as.character(as.numeric(papers_counties$county_fips))
papers_counties$years <- as.numeric(papers_counties$years)
econ_counties <- left_join(econ_counties,papers_counties[,c("fips","years","dailypaper")],
													 by=c("fips" = "fips","year" = "years")) %>%
	rename(county_dailypaper = dailypaper)


mayors <- mayors %>%
	rename(mayor_demshare = demshare,
				 mayor_dem_control = dem_control
				) %>%
	mutate(year = as.numeric(year))

# which cities to merge with each county of econ data when county has more than one city?
dupes = mayors %>%
	group_by(county_fips) %>%
	summarize(n_cities = length(unique(fips)))
length(unique(dupes$county_fips[dupes$n_cities>1])) # 67 counties with >1 city (out of 394 total)

# sort by population and choose largest population one:
econ_counties$city_fips <- NA
for(i in 1:length(unique(econ_counties$fips))){
	if(unique(econ_counties$fips)[i] %in% as.numeric(mayors$county_fips)){
		thiscounty <- subset(mayors, as.numeric(county_fips) == unique(econ_counties$fips)[i])
		if(length(unique(thiscounty$fips))>1){
			maxpop_fips <- unique(thiscounty$fips[which(thiscounty$population_est == max(thiscounty$population_est))])
			econ_counties$city_fips[which(econ_counties$fips == unique(econ_counties$fips)[i])] <- maxpop_fips
		} else{
			econ_counties$city_fips[which(econ_counties$fips == unique(econ_counties$fips)[i])] <- thiscounty$fips[1]
		}
	}
}
# 1672434 obs here

econ_counties_cities <- left_join(econ_counties,
																	mayors[,c("fips","year","mayor_demshare","mayor_dem_control","county_fips","county_name")],
																	by=c("city_fips" = "fips", "year" = "year"))
econ_counties_cities$fips <- sprintf("%05s",econ_counties_cities$fips) # to facilitate matching
# 1672524 obs now - added 90 obs for some reason?


# ------------------------------------- #
#### US House districts overlap data ####
# ------------------------------------- #
house_corr_19571958 <- read_csv("geo_overlaps/house_countycorr19571958.csv") %>%
	rename(house_to_county = cd_in_county,
				 county_to_house = county_in_cd)

house_corr_19591960 <- read_csv("geo_overlaps/house_countycorr19591960.csv") %>%
	rename(house_to_county = cd_in_county,
				 county_to_house = county_in_cd)

house_corr_19611962 <- read_csv("geo_overlaps/house_countycorr19611962.csv") %>%
	rename(house_to_county = cd_in_county,
				 county_to_house = county_in_cd)

house_corr_19631964 <- read_csv("geo_overlaps/house_countycorr19631964.csv") %>%
	rename(house_to_county = cd_in_county,
				 county_to_house = county_in_cd)

house_corr_19651966 <- read_csv("geo_overlaps/house_countycorr19651966.csv") %>%
	rename(house_to_county = cd_in_county,
				 county_to_house = county_in_cd)

house_corr_19671968 <- read_csv("geo_overlaps/house_countycorr19671968.csv") %>%
	rename(house_to_county = cd_in_county,
				 county_to_house = county_in_cd)

house_corr_19691970 <- read_csv("geo_overlaps/house_countycorr19691970.csv") %>%
	rename(house_to_county = cd_in_county, # % of house district in county, sums to 1 for every house_full
				 county_to_house = county_in_cd) # % of county in house district, sums to 1 for every county_fips

house_corr_19711972 <- read_csv("geo_overlaps/house_countycorr19711972.csv") %>%
	rename(house_to_county = cd_in_county,
				 county_to_house = county_in_cd)

house_corr_19731974 <- read_csv("geo_overlaps/house_countycorr19731974.csv") %>%
	rename(house_to_county = cd_in_county,
				 county_to_house = county_in_cd)

house_corr_19751976 <- read_csv("geo_overlaps/house_countycorr19751976.csv") %>%
	rename(house_to_county = cd_in_county,
				 county_to_house = county_in_cd)

house_corr_19771978 <- read_csv("geo_overlaps/house_countycorr19771978.csv") %>%
	rename(house_to_county = cd_in_county,
				 county_to_house = county_in_cd)

house_corr_19791980 <- read_csv("geo_overlaps/house_countycorr19791980.csv") %>%
	rename(house_to_county = cd_in_county,
				 county_to_house = county_in_cd)

house_corr_19811982 <- read_csv("geo_overlaps/house_countycorr19811982.csv") %>%
	rename(house_to_county = cd_in_county,
				 county_to_house = county_in_cd)

house_corr_19901992 <- read_csv("geo_overlaps/house_countycorr19901992.csv") %>%
	rename(house_to_county = `cd102 to county allocation factor`,
				 county_to_house = `county to cd102 allocation factor`,
				 county_fips = county)
house_corr_19901992 <- house_corr_19901992 %>%
	mutate(house_to_county = as.numeric(house_to_county),
				 county_to_house = as.numeric(county_to_house),
				 state = substr(county_fips,1,2),
				 house_full = paste0(state,sprintf("%03s",cd102))
				 )

house_corr_19931998 <- read_csv("geo_overlaps/house_countycorr19921998.csv") %>%
	rename(house_to_county = `cd103 to county allocation factor`,
				 county_to_house = `county to cd103 allocation factor`,
				 county_fips = county) %>%
	mutate(house_to_county = as.numeric(house_to_county),
				 county_to_house = as.numeric(county_to_house),
				 state = substr(county_fips,1,2),
				 house_full = paste0(state,sprintf("%03s",`103rd Congressional District`))
	)

house_corr_19992000 <- read_csv("geo_overlaps/house_countycorr19992000.csv",skip = 1) %>%
	rename(house_to_county = `cd106 to county allocation factor`,
				 county_to_house = `county to cd106 allocation factor`,
				 state = `FIPS state`,
				 county_fips = county) %>%
	mutate(house_to_county = as.numeric(house_to_county),
				 county_to_house = as.numeric(county_to_house),
				 house_full = paste0(state,sprintf("%03s",`Cong District - 106th (1998)`))
	)

house_corr_20032004 <- read_csv("geo_overlaps/house_countycorr20032004.csv",skip = 1) %>%
	rename(house_to_county = `cd108 to county allocation factor`,
				 county_to_house = `county to cd108 allocation factor`,
				 state = `FIPS state`,
				 county_fips = county) %>%
	mutate(house_to_county = as.numeric(house_to_county),
				 county_to_house = as.numeric(county_to_house),
				 house_full = paste0(state,sprintf("%03s",`108th Congressional District (2002)`))
	)

house_corr_20052005 <- read_csv("geo_overlaps/house_countycorr20052006.csv",skip = 1) %>%
	rename(house_to_county = `cd109 to county allocation factor`,
				 county_to_house = `county to cd109 allocation factor`,
				 state = `FIPS state`,
				 county_fips = county) %>%
	mutate(house_to_county = as.numeric(house_to_county),
				 county_to_house = as.numeric(county_to_house),
				 house_full = paste0(state,sprintf("%03s",`Cong District - 109th (2004)`))
	)

house_corr_20092010 <- read_csv("geo_overlaps/house_countycorr20092010.csv",skip = 1) %>%
	rename(house_to_county = `cd111 to county allocation factor`,
				 county_to_house = `county to cd111 allocation factor`,
				 state = `State code`,
				 county_fips = `County code`) %>%
	mutate(house_to_county = as.numeric(house_to_county),
				 county_to_house = as.numeric(county_to_house),
				 house_full = paste0(state,sprintf("%03s",`111th Congressional district`))
	)

house_corr_20132014 <- read_csv("geo_overlaps/house_countycorr20132014.csv",skip = 1) %>%
	rename(house_to_county = `cd113 to county allocation factor`,
				 county_to_house = `county to cd113 allocation factor`,
				 state = `State code`,
				 county_fips = `County code`) %>%
	mutate(house_to_county = as.numeric(house_to_county),
				 county_to_house = as.numeric(county_to_house),
				 house_full = paste0(state,sprintf("%03s",`113th Congressional district`))
	)

house_corr_20152016 <- read_csv("geo_overlaps/house_countycorr20152016.csv",skip = 1) %>%
	rename(house_to_county = `cd114 to county allocation factor`,
				 county_to_house = `county to cd114 allocation factor`,
				 state = `State code`,
				 county_fips = `County code`) %>%
	mutate(house_to_county = as.numeric(house_to_county),
				 county_to_house = as.numeric(county_to_house),
				 house_full = paste0(state,sprintf("%03s",cd114))
	)

house_corr_20172018 <- read_csv("geo_overlaps/house_countycorr20172018.csv",skip = 1) %>%
	rename(house_to_county = `cd115 to county allocation factor`,
				 county_to_house = `county to cd115 allocation factor`,
				 county_fips = `County code`) %>%
	mutate(house_to_county = as.numeric(house_to_county),
				 county_to_house = as.numeric(county_to_house),
				 state = substr(county_fips,1,2),
				 house_full = paste0(state,sprintf("%03s",`115th Congressional district`))
	)

## bring in House elections data:
load("Election_Returns/house_elections_1932_2016.RData") # data2 object
house_elecs <- as_tibble(data); rm(data)
# bring in state FIPS codes:
fips_cw <- read_csv("StateCodes.csv")
house_elecs$state_fips <- sprintf("%02s",fips_cw$state_fips[match(house_elecs$state_abbrev,fips_cw$POAbrv)])

# create var for incumbency party
house_elecs <- house_elecs %>%
	mutate(house_full = paste0(state_fips,sprintf("%03s",district_code))) %>%
	group_by(house_full) %>%
	mutate(incumb_dem = lag(party_dem,order_by = election,n = 1))

## expand to create years between elections
house_elecs <- house_elecs %>%
	mutate(election = as.numeric(election)) %>%
	complete(election = seq.int(min(election), max(election),by = 1))

house_elecs <- house_elecs %>%
	arrange(election) %>%
	group_by(house_full) %>%
	fill(party_dem) %>% # fill between election years
	mutate(incumb_dem = lag(party_dem,order_by = election,n = 1))

summary(house_elecs$incumb_dem)
table(is.na(house_elecs$incumb_dem),house_elecs$election)

### add to county econ dataset by CD and year:

## Below code matches counties to US House districts
# This process takes awhile so first check if crosswalk file already exists:
if(file.exists("geo_overlaps/house_county_crosswalk.csv")){
	house_county_crosswalk <- read_csv("geo_overlaps/house_county_crosswalk.csv")
} else{
	# create new crosswalk data out of relevant vars:
	house_county_crosswalk <- econ_counties_cities %>%
		select(fips,year)
	
	# create empty vars inside crosswalk to store rescaled vote numbers:
	house_county_crosswalk$house_dem_incumb <- NA
	
	# match House districts to counties based on appropriate overlap shapes/population data:
	for(i in 1:nrow(house_county_crosswalk)){
		thisyear <- house_county_crosswalk$year[i]
		thisyear_house <- subset(house_elecs,election == thisyear)
		thiscounty <- house_county_crosswalk$fips[i]
		
		# use correct overlaps data based on different overlap files:
		thiscounty_house <- NULL
		if(thisyear>=2017){ 
			thiscounty_house <- subset(house_corr_20172018,county_fips == thiscounty)
		}
		
		if(thisyear<2017 & thisyear>=2015){
			thiscounty_house <- subset(house_corr_20152016,county_fips == thiscounty)
		}
		if(thisyear<2015 & thisyear>=2013){
			thiscounty_house <- subset(house_corr_20132014,county_fips == thiscounty)
		}
		if(thisyear<2013 & thisyear>=2009){
			thiscounty_house <- subset(house_corr_20092010,county_fips == thiscounty)
		}
		if(thisyear<2009 & thisyear>=2005){
			thiscounty_house <- subset(house_corr_20052005,county_fips == thiscounty)
		}
		if(thisyear<2005 & thisyear>=2003){
			thiscounty_house <- subset(house_corr_20032004,county_fips == thiscounty)
		}
		if(thisyear<2003 & thisyear>=1999){
			thiscounty_house <- subset(house_corr_19992000,county_fips == thiscounty)
		}
		if(thisyear<1999 & thisyear>=1993){
			thiscounty_house <- subset(house_corr_19931998,county_fips == thiscounty)
		}
		if(thisyear<1993 & thisyear>=1983){
			thiscounty_house <- subset(house_corr_19901992,county_fips == thiscounty)
		}
		if(thisyear<1983 & thisyear>=1981){
			thiscounty_house <- subset(house_corr_19811982,county_fips == thiscounty)
		}
		if(thisyear<1981 & thisyear>=1979){
			thiscounty_house <- subset(house_corr_19791980,county_fips == thiscounty)
		}
		if(thisyear<1979 & thisyear>=1977){
			thiscounty_house <- subset(house_corr_19771978,county_fips == thiscounty)
		}
		if(thisyear<1977 & thisyear>=1975){
			thiscounty_house <- subset(house_corr_19751976,county_fips == thiscounty)
		}
		if(thisyear<1975 & thisyear>=1973){
			thiscounty_house <- subset(house_corr_19731974,county_fips == thiscounty)
		}
		if(thisyear<1973 & thisyear>=1971){
			thiscounty_house <- subset(house_corr_19711972,county_fips == thiscounty)
		}
		if(thisyear<1971 & thisyear>=1969){
			thiscounty_house <- subset(house_corr_19691970,county_fips == thiscounty)
		}
		if(thisyear<1969 & thisyear>=1967){
			thiscounty_house <- subset(house_corr_19671968,county_fips == thiscounty)
		}
		if(thisyear>=1967){
			thiscounty_house$incumb_dem <- thisyear_house$incumb_dem[match(thiscounty_house$house_full,thisyear_house$house_full)]
			if(nrow(thiscounty_house)>0){
				# calculate percentage represented by incumbent dem:
				thiscounty_house$incumb_dem_scaled <- thiscounty_house$incumb_dem*thiscounty_house$county_to_house
				
				house_county_crosswalk$house_dem_incumb[i] <- sum(thiscounty_house$incumb_dem_scaled)
			}
		}
	}
	# output results
	write_csv(house_county_crosswalk,"geo_overlaps/house_county_crosswalk.csv")
}

# join crosswalk back into the main dataset:
econ_counties_cities <- left_join(econ_counties_cities,house_county_crosswalk,by=c("fips","year"))
# 1673169 obs now - added 645 obs?
summary(econ_counties_cities$house_dem_incumb) # mean = 0.46
table(is.na(econ_counties_cities$house_dem_votes),econ_counties_cities$year)
table(is.na(econ_counties_cities$house_dem_incumb),econ_counties_cities$year) # nothing before 1973


# ---------------------------------------------- #
#### state legislative districts overlap data ####
# ---------------------------------------------- #
# overlaps between county and SLDs from http://mcdc.missouri.edu/applications/geocorr2014.html 
upper_corr <- read_csv("geo_overlaps/upper_county_corr_2014.csv",skip = 1) %>%
	rename(sldu_to_county = `sldu14 to county14 alloc factor`,
				 county_to_sldu = `county14 to sldu14 alloc factor`,
				 state = `FIPS state`,
				 county_fips = county14) %>%
	mutate(sldu_to_county = as.numeric(sldu_to_county),
				 county_to_sldu = as.numeric(county_to_sldu),
				 sldu_full = paste0(state,sldu14))

lower_corr <- read_csv("geo_overlaps/lower_county_corr_2014.csv",skip=1) %>%
	rename(sldl_to_county = `sldl14 to county14 alloc factor`,
				 county_to_sldl = `county14 to sldl14 alloc factor`,
				 state = `FIPS state`,
				 county_fips = county14) %>%
	mutate(sldl_to_county = as.numeric(sldl_to_county),
				 county_to_sldl = as.numeric(county_to_sldl),
				 sldl_full = paste0(state,sldl14))
# checking:
# upper_corr %>%
# 	group_by(sldu_full) %>%
# 	summarize(sumfactor = sum(sldu_to_county)) %>%
# 	arrange(desc(sumfactor)) # max of 1
# upper_corr %>%
# 	group_by(county_fips) %>%
# 	summarize(sumfactor = sum(county_to_sldu)) %>%
# 	arrange(desc(sumfactor)) # max of 1.00
# 
# lower_corr %>%
# 	group_by(sldl_full) %>%
# 	summarize(sumfactor = sum(sldl_to_county)) %>%
# 	arrange(desc(sumfactor)) # max of 1
# lower_corr %>%
# 	group_by(county_fips) %>%
# 	summarize(sumfactor = sum(county_to_sldl)) %>%
# 	arrange(desc(sumfactor)) # max of 1.01, that's fine


## pre-2012 redistricting
# http://mcdc.missouri.edu/applications/geocorr2000.html apparently has wrong allocation factors, so doing by individual state and then compiling into one:
upper_corr02 <- read_csv("geo_overlaps/upper_county_corr_2002.csv",skip=1) %>%
	rename(sldu_to_county = `sldu02 to county allocation factor`,
				 county_to_sldu = `county to sldu02 allocation factor`,
				 county_fips = county) %>%
	filter(!is.na(`State Leg District Upper (Senate)- 2002`)) %>%
	mutate(sldu_to_county = as.numeric(sldu_to_county),
				 county_to_sldu = as.numeric(county_to_sldu),
				 state = str_sub(county_fips,1,2),
				 sldu_full = paste0(state,`State Leg District Upper (Senate)- 2002`))

lower_corr02 <- read_csv("geo_overlaps/lower_county_corr_2002.csv",skip=1) %>%
	rename(sldl_to_county = `sldl02 to county allocation factor`,
				 county_to_sldl = `county to sldl02 allocation factor`,
				 county_fips = county) %>%
	filter(!is.na(`State Leg District Lower (House)- 2002`)) %>%
	mutate(sldl_to_county = as.numeric(sldl_to_county),
				 county_to_sldl = as.numeric(county_to_sldl),
				 state = str_sub(county_fips,1,2),
				 sldl_full = paste0(state,`State Leg District Lower (House)- 2002`))


## pre-2002 redistricting, overlaps between counties and SLDs from NHGIS processed using file: sld_overlap_counties.R
upper_corr00 <- read_csv("geo_overlaps/upper_county_corr_2000.csv") %>%
	rename(state = STATEFP00) %>%
	filter(!is.na(SLDUST00)) %>%
	mutate(sldu_to_county = as.numeric(sldu_in_county),
				 sldu00 = sprintf("%03s",SLDUST00),
				 sldu_full = paste0(state,sldu00))

lower_corr00 <- read_csv("geo_overlaps/lower_county_corr_2000.csv") %>%
	rename(state = STATEFP00) %>%
	filter(!is.na(SLDLST00)) %>%
	mutate(sldl_to_county = as.numeric(sldl_in_county),
				 sldl00 = sprintf("%03s",SLDLST00),
				 sldl_full = paste0(state,sldl00))


## pre-1990, use overlaps of counties and SLDLs from ICPSR votes apportioned to counties
lower_corr68 <- read_csv("geo_overlaps/lower_county_corr_1968.csv") %>%
	filter(!is.na(sldl68)) %>%
	mutate(year = as.numeric(Election_Year),
				 sldl_to_county = as.numeric(sldl_to_county),
				 county_to_sldl = as.numeric(county_to_sldl)
	)


## bring in SL election results:
SLERs <- read_dta(file="Election_Returns/196slers1967to2016_20180908_trimmed.dta")
## fix some variables:
SLERs$post <- SLERs$geopost
SLERs$post[is.na(SLERs$post)] <- 0
table(SLERs$post)
SLERs$post2 <- SLERs$mmdpost
SLERs$post2[is.na(SLERs$post2)] <- 0
SLERs$post2[SLERs$post2==""] <- 0
table(SLERs$post2)

## fix an errant multimember post identifier in ND
SLERs$post2[SLERs$sab=="nd" & SLERs$mmdpost =='zzzab'] <- 0
SLERs$dno[is.na(SLERs$dno)] <- 0

table(SLERs$post2, SLERs$post)
SLERs$post_merged <- SLERs$post
SLERs$post_merged[SLERs$post_merged==0] <- SLERs$post2[SLERs$post_merged==0]

SLERs$post_merged[SLERs$year<1990 & SLERs$stateabrev=="ga" & SLERs$distz==39] <- 0

# SLERs <- subset(SLERs, sen==0)
dim(SLERs)
## subset to winners
#SLERs <- subset(SLERs, outcome=="w")

## create artificial vote number for winners with no vote number:
SLERs$vote[SLERs$outcome=="w" & is.na(SLERs$vote)] <- 1

### subset to SMD
#SLERs <- subset(SLERs, dtype==1)
### subset to general elections	
SLERs <- subset(SLERs, etype=="g")
### subset to democrats and republicans
SLERs <- subset(SLERs, partyt=="d" | partyt=="r")
#SLERs$party[SLERs$party=="republican"] <- "modern republican"
#SLERs$party[SLERs$party=="democraticparty"] <- "democrat"
table(SLERs$sab)
### subset to lower chamber
SLERs_l <- subset(SLERs, sen==0)
### subset to upper chamber
SLERs_u <- subset(SLERs, sen==1)
table(SLERs_u$sab)
### keep only districts with one seat
# SLERs_l <- subset(SLERs_l, dseats==1)
# SLERs_u <- subset(SLERs_u, dseats==1)
table(SLERs_l$sab)

## summarize over candidates, this addresses when there are multiple rows per candidate
# lower house:
SLERs_l <- SLERs_l %>% 
	group_by(year, sab, dname, dno, post_merged, candid) %>%
	summarize(
		vote = sum(vote, na.rm=T), 
		partyt = first(partyt),
		outcome = first(outcome),
		sicpsr = first(sicpsr),
		sfips = first(sfips),
		sen = first(sen),
		party = first(party),
		dseats = first(dseats),
		post = first(post),
		post2 = first(post2),
		etype = first(etype),
		dtype = first(dtype),
		cand = first(cand),
		regime = first(regime),
		# Decade = first(Decade),
		exper = first(exper)
	)

# upper house:
SLERs_u <- SLERs_u %>% 
	group_by(year, sab, dname, dno, post_merged, candid) %>%
	summarize(
		vote = sum(vote, na.rm=T), 
		partyt = first(partyt),
		outcome = first(outcome),
		sicpsr = first(sicpsr),
		sfips = first(sfips),
		sen = first(sen),
		party = first(party),
		dseats = first(dseats),
		post = first(post),
		post2 = first(post2),
		etype = first(etype),
		dtype = first(dtype),
		cand = first(cand),
		regime = first(regime),
		# Decade = first(Decade),
		exper = first(exper)
	)

## create demshare per district per year:
SLER_l_summ <- SLERs_l %>% 
	group_by(year, sab, sfips, dname, dno) %>%
	summarize(
		votesdem = sum(subset(vote, partyt=="d"), na.rm=T), # total dem votes across all seats in district
		votesrep = sum(subset(vote, partyt=="r"), na.rm=T),  # total rep votes across all seats in district
		# votes_incumb = sum(subset(vote,exper=="inc"),na.rm=T), # these lines don't work becasue SLERs data only codes if *candidate* was incumbent, not their party
		# party_incumb = first(subset(partyt,exper=="inc")), # take first
		# incumb_dem = as.numeric(first(subset(partyt,exper=="inc"))=="d") # take first
		winnerparty = first(subset(partyt,outcome=="w"))
	)
# check results
table(SLER_l_summ$winnerparty) # 58k dem, 46k rep

SLER_l_summ$sfips <- sprintf("%02i",SLER_l_summ$sfips)
SLER_l_summ$dno <- sprintf("%03i",SLER_l_summ$dno)
SLER_l_summ$dno[SLER_l_summ$dno=="000"] <- NA
SLER_l_summ$sldl_full <- paste0(SLER_l_summ$sfips,SLER_l_summ$dno)
SLER_l_summ$sldl_full[is.na(SLER_l_summ$dno)] <- NA
head(SLER_l_summ)

SLER_u_summ <- SLERs_u %>% 
	group_by(year, sab, sfips, dname, dno) %>%
	summarize(
		votesdem = sum(subset(vote, partyt=="d"), na.rm=T), # total dem votes across all seats in district
		votesrep = sum(subset(vote, partyt=="r"), na.rm=T),  # total rep votes across all seats in district
		# votes_incumb = sum(subset(vote,exper=="inc"),na.rm=T),
		# party_incumb = first(subset(partyt,exper=="inc")), # take first
		# incumb_dem = as.numeric(first(subset(partyt,exper=="inc"))=="d") # take first
		winnerparty = first(subset(partyt,outcome=="w"))
	)
# check results:
table(SLER_u_summ$winnerparty) # 16k dem, 13k rep

SLER_u_summ$sfips <- sprintf("%02i",SLER_u_summ$sfips)
SLER_u_summ$dno <- sprintf("%03i",SLER_u_summ$dno)
SLER_u_summ$dno[SLER_u_summ$dno=="000"] <- NA
SLER_u_summ$sldu_full <- paste0(SLER_u_summ$sfips,SLER_u_summ$dno)
SLER_u_summ$sldu_full[is.na(SLER_u_summ$dno)] <- NA
head(SLER_u_summ)

SLER_l_summ$demshare <- SLER_l_summ$votesdem/(SLER_l_summ$votesdem + SLER_l_summ$votesrep)
SLER_u_summ$demshare <- SLER_u_summ$votesdem/(SLER_u_summ$votesdem + SLER_u_summ$votesrep)
# SLER_l_summ$inc_share <- SLER_l_summ$votes_incumb/(SLER_l_summ$votesdem + SLER_l_summ$votesrep)
# SLER_u_summ$inc_share <- SLER_u_summ$votes_incumb/(SLER_u_summ$votesdem + SLER_u_summ$votesrep)
SLER_l_summ <- SLER_l_summ %>%
	group_by(sfips,dname,dno,sldl_full) %>%
	mutate(incumbent_party = lag(winnerparty,order_by = year,n = 1),
				 incumb_dem = as.numeric(incumbent_party=="d"))
SLER_u_summ <- SLER_u_summ %>%
	group_by(sfips,dname,dno,sldu_full) %>%
	mutate(incumbent_party = lag(winnerparty,order_by = year,n = 1),
				 incumb_dem = as.numeric(incumbent_party=="d"))
SLER_u_summ <- filter(SLER_u_summ,!is.na(sldu_full))

table(SLER_l_summ$incumbent_party) # 55k dem, 43k rep
table(SLER_u_summ$incumbent_party) # 15k dem, 12k rep

## Need to fill winner party in years between election years for calculating incumbency:
year_dist_mat <- data.frame(year = rep(1968:2018,each=length(unique(SLER_l_summ$sldl_full))),
														sldl_full = rep(unique(SLER_l_summ$sldl_full),times=length(1968:2018))) %>%
	as_tibble()
SLER_l_summ2 <- full_join(SLER_l_summ,year_dist_mat,by=c("sldl_full","year")) # creates extra rows between election years
# impute office-holder between years and recalculate incumbency vars:
SLER_l_summ2 <- SLER_l_summ2 %>%
	arrange(year) %>%
	group_by(sldl_full) %>%
	fill(winnerparty) %>%
	mutate(incumbent_party = lag(winnerparty,order_by = year,n = 1), # calculates incumbency from office-holder T-1
				 incumb_dem = as.numeric(incumbent_party=="d"))


year_dist_mat <- data.frame(year = rep(1968:2018,each=length(unique(SLER_u_summ$sldu_full))),
														sldu_full = rep(unique(SLER_u_summ$sldu_full),times=length(1968:2018))) %>%
	as_tibble()
SLER_u_summ2 <- full_join(SLER_u_summ,year_dist_mat,by=c("sldu_full","year")) # creates extra rows between election years
# impute office-holder between years and recalculate incumbency vars:
SLER_u_summ2 <- SLER_u_summ2 %>%
	arrange(year) %>%
	group_by(sldu_full) %>%
	fill(winnerparty) %>%
	mutate(incumbent_party = lag(winnerparty,order_by = year,n = 1), # calculates incumbency from office-holder T-1
				 incumb_dem = as.numeric(incumbent_party=="d"))

## checking:
# View(SLER_u_summ2 %>% filter(sldu_full=="06010") %>% arrange(year))

econ_counties_cities$fips <- sprintf("%05s",econ_counties_cities$fips) # to facilitate matching, should already be done in house section

# output list of counties
econ_counties_cities %>%
	select(fips,county_name.x,state_abb) %>%
	unique() %>%
	write_csv(path = "counties_list.csv")

# Below code matches counties to SLDs. This process takes awhile so first check if crosswalk file already exists:

if(file.exists("geo_overlaps/sld_county_crosswalk.csv")){
	sld_county_crosswalk <- read_csv("geo_overlaps/sld_county_crosswalk.csv")
} else{
	# create new CW data out of relevant vars:
	sld_county_crosswalk <- econ_counties_cities %>%
		select(fips,year)

	# create empty vars inside CW to store rescaled vote numbers:
	thiscounty_sldls <- NULL
	thiscounty_sldus <- NULL
	sld_county_crosswalk$sldl_dem_votes <- NA
	sld_county_crosswalk$sldl_rep_votes <- NA
	sld_county_crosswalk$sldu_dem_votes <- NA
	sld_county_crosswalk$sldu_rep_votes <- NA
	# sld_county_crosswalk$sldl_inc_votes <- NA
	# sld_county_crosswalk$sldu_inc_votes <- NA
	sld_county_crosswalk$sldl_dem_incumb <- NA
	sld_county_crosswalk$sldu_dem_incumb <- NA
	
	# match SLDs to counties based on appropriate overlap shapes/population data:
	for(i in 1:nrow(sld_county_crosswalk)){
		thisyear <- sld_county_crosswalk$year[i]
		thisyear_sldls <- subset(SLER_l_summ2,year == thisyear)
		thisyear_sldus <- subset(SLER_u_summ2,year == thisyear)
		
		thiscounty <- sld_county_crosswalk$fips[i]
		
		if(thisyear>=2012){ # post-2012 use new SLD overlaps based on 2010 census population
			thiscounty_sldls <- subset(lower_corr,county_fips == thiscounty)
			thiscounty_sldus <- subset(upper_corr,county_fips == thiscounty)
		}
		
		if(thisyear<2012 & thisyear>=2002){ # pre-2012 use old SLD overlaps based on 2000 census population
			thiscounty_sldls <- subset(lower_corr02,county_fips == thiscounty)
			thiscounty_sldus <- subset(upper_corr02,county_fips == thiscounty)
		}
		if(thisyear<2002 & thisyear>=1990){ # pre-2002 use old SLD overlaps based on 2000 census population
			thiscounty_sldls <- subset(lower_corr00,county_fips == thiscounty)
			thiscounty_sldus <- subset(upper_corr00,county_fips == thiscounty)
		}
		if(thisyear<1990 & thisyear>=1968){ # pre-2002 use old SLD overlaps based on 2000 census population
			thiscounty_sldls <- subset(lower_corr68,county_fips == thiscounty & (year == thisyear | year==(thisyear-1)))
			thiscounty_sldus <- NULL
		}
		if(thisyear<1968){
			thiscounty_sldls <- NULL
			thiscounty_sldus <- NULL
			
		}
		
		if(thisyear>=1968){ 
			
			if(thisyear>=1990){# should really be 1992, but don't have SLD boundaries for 1990-1992.
				thiscounty_sldls$demvotes <- thisyear_sldls$votesdem[match(thiscounty_sldls$sldl_full,thisyear_sldls$sldl_full)]
				thiscounty_sldls$repvotes <- thisyear_sldls$votesrep[match(thiscounty_sldls$sldl_full,thisyear_sldls$sldl_full)]
				# thiscounty_sldls$incvotes <- thisyear_sldls$votes_incumb[match(thiscounty_sldls$sldl_full,thisyear_sldls$sldl_full)]
				thiscounty_sldls$incumb_dem <- thisyear_sldls$incumb_dem[match(thiscounty_sldls$sldl_full,thisyear_sldls$sldl_full)]
				thiscounty_sldus$demvotes <- thisyear_sldus$votesdem[match(thiscounty_sldus$sldu_full,thisyear_sldus$sldu_full)]
				thiscounty_sldus$repvotes <- thisyear_sldus$votesrep[match(thiscounty_sldus$sldu_full,thisyear_sldus$sldu_full)]
				# thiscounty_sldus$incvotes <- thisyear_sldus$votes_incumb[match(thiscounty_sldus$sldu_full,thisyear_sldus$sldu_full)]
				thiscounty_sldus$incumb_dem <- thisyear_sldus$incumb_dem[match(thiscounty_sldus$sldu_full,thisyear_sldus$sldu_full)]
			}
			if(thisyear>=1968 & thisyear<1990){
				thiscounty_sldls$incumb_dem <- thisyear_sldls$incumb_dem[match(thiscounty_sldls$sldl_full,thisyear_sldls$sldl_full)]
				thiscounty_sldls$demvotes <- thisyear_sldls$votesdem[match(thiscounty_sldls$sldl_full,thisyear_sldls$sldl_full)]
				thiscounty_sldls$repvotes <- thisyear_sldls$votesrep[match(thiscounty_sldls$sldl_full,thisyear_sldls$sldl_full)]
			}
			
			if(nrow(thiscounty_sldls)>0){
				# scale votes by % of district in county:
				thiscounty_sldls$demvotes_scaled <- thiscounty_sldls$demvotes*thiscounty_sldls$sldl_to_county
				thiscounty_sldls$repvotes_scaled <- thiscounty_sldls$repvotes*thiscounty_sldls$sldl_to_county
				# thiscounty_sldls$incvotes_scaled <- thiscounty_sldls$incvotes*thiscounty_sldls$sldl_to_county
				# add scaled votes from all districts in county
				sld_county_crosswalk$sldl_dem_votes[i] <- sum(thiscounty_sldls$demvotes_scaled,na.rm=T)
				sld_county_crosswalk$sldl_rep_votes[i] <- sum(thiscounty_sldls$repvotes_scaled,na.rm=T)
				
				# calculate percentage represented by incumbent dem:
				thiscounty_sldls$incumb_dem_scaled <- thiscounty_sldls$incumb_dem*thiscounty_sldls$county_to_sldl
				
				# sld_county_crosswalk$sldl_inc_votes[i] <- sum(thiscounty_sldls$incvotes_scaled,na.rm=T)
				sld_county_crosswalk$sldl_dem_incumb[i] <- sum(thiscounty_sldls$incumb_dem_scaled)
			}
			if(length(thiscounty_sldus)>0 && nrow(thiscounty_sldus)>0){
				thiscounty_sldus$demvotes_scaled <- thiscounty_sldus$demvotes*thiscounty_sldus$sldu_to_county
				thiscounty_sldus$repvotes_scaled <- thiscounty_sldus$repvotes*thiscounty_sldus$sldu_to_county
				# thiscounty_sldus$incvotes_scaled <- thiscounty_sldus$incvotes*thiscounty_sldus$sldu_to_county
				
				thiscounty_sldus$incumb_dem_scaled <- thiscounty_sldus$incumb_dem*thiscounty_sldus$county_to_sldu
				
				sld_county_crosswalk$sldu_dem_votes[i] <- sum(thiscounty_sldus$demvotes_scaled,na.rm=T)
				sld_county_crosswalk$sldu_rep_votes[i] <- sum(thiscounty_sldus$repvotes_scaled,na.rm=T)
				# sld_county_crosswalk$sldu_inc_votes[i] <- sum(thiscounty_sldus$incvotes_scaled,na.rm=T)
				sld_county_crosswalk$sldu_dem_incumb[i] <- sum(thiscounty_sldus$incumb_dem_scaled)
			}
		}
	}
	write_csv(sld_county_crosswalk,"sld_county_crosswalk.csv")
}

table(is.na(sld_county_crosswalk$sldl_dem_votes),sld_county_crosswalk$year)
table(is.na(sld_county_crosswalk$sldl_dem_incumb),sld_county_crosswalk$year)

# join crosswalk back into the main dataset:
dim(econ_counties_cities) # 1673169
econ_counties_cities <- left_join(econ_counties_cities,sld_county_crosswalk,by=c("fips","year"))
dim(econ_counties_cities) # 1680585


# integrate pre-1990 SLD data from ICPSR with post-1990 SLERs data:
summary(econ_counties_cities$sldl_dem_votes)
summary(econ_counties_cities$sld_dem_votes)
table(econ_counties_cities$year[!is.na(econ_counties_cities$sldl_dem_votes)])

# ------------------------------ #
#### create main DVs and lags ####
# ------------------------------ #

# convert lots to numeric that were character:
econ_counties_cities$mayor_demshare <- as.numeric(econ_counties_cities$mayor_demshare) 
econ_counties_cities$countyleg_avg_demshare <- as.numeric(econ_counties_cities$countyleg_avg_demshare)+0.5
econ_counties_cities$house_dem_votes <- as.numeric(econ_counties_cities$house_dem_votes) 
econ_counties_cities$house_rep_votes <- as.numeric(econ_counties_cities$house_rep_votes) 
econ_counties_cities$pres_dem_votes <- as.numeric(econ_counties_cities$pres_dem_votes) 
econ_counties_cities$pres_rep_votes <- as.numeric(econ_counties_cities$pres_rep_votes) 

econ_counties_cities$wages_perworker_real <- as.numeric(econ_counties_cities$wages_perworker_real)
econ_counties_cities$qcew_pay_perworker_real <- as.numeric(econ_counties_cities$qcew_pay_perworker_real)


econ_counties_cities <- econ_counties_cities %>%
	mutate(sldl_demshare = sldl_dem_votes/(sldl_dem_votes + sldl_rep_votes),
				 # sldl_incshare = sldl_inc_votes/(sldl_dem_votes + sldl_rep_votes),
				 sldu_demshare = sldu_dem_votes/(sldu_dem_votes + sldu_rep_votes),
				 # sldu_incshare = sldu_inc_votes/(sldu_dem_votes + sldu_rep_votes),
				 gov_demshare = gov_dem_votes/(gov_dem_votes + gov_rep_votes),
				 sen_demshare = sen_dem_votes/(sen_dem_votes + sen_rep_votes),
				 house_demshare = house_dem_votes/(house_dem_votes + house_rep_votes),
				 pres_demshare = pres_dem_votes/(pres_dem_votes + pres_rep_votes),
				 
				 sldl_votes_total2p = (sldl_dem_votes + sldl_rep_votes),
				 sldu_votes_total2p = (sldu_dem_votes + sldu_rep_votes),
				 gov_votes_total2p = (gov_dem_votes + gov_rep_votes),
				 sen_votes_total2p = (sen_dem_votes + sen_rep_votes),
				 house_votes_total2p = (house_dem_votes + house_rep_votes),
				 pres_votes_total2p = (pres_dem_votes + pres_rep_votes)
				 
	) 

## create lag function for when years are missing in data:
lag.new <- function(x, n = 1L, along_with){
	index <- match(along_with - n, along_with, incomparable = NA)
	out <- x[index]
	attributes(out) <- attributes(x)
	out
}

lead.new <- function(x, n = 1L, along_with){
	index <- match(along_with + n, along_with, incomparable = NA)
	out <- x[index]
	attributes(out) <- attributes(x)
	out
}

econ_counties_cities <- econ_counties_cities %>%
	group_by(fips) %>%
	mutate(pres_demshare_lag = lag.new(pres_demshare,along_with=year,n=4),
				 sen_demshare_lag = lag.new(sen_demshare,along_with=year,n=6),
				 house_demshare_lag = lag.new(house_demshare,along_with=year,n=2),
				 gov_demshare_lag = lag.new(gov_demshare,along_with=year,n=2),
				 gov_demshare_lag4 = lag.new(gov_demshare,along_with=year,n=4),
				 stateoffice_demshare_lag = lag.new(stateoffice_demshare,along_with=year,n=4),
			#	 attorney_general_demshare_lag = lag.new(attorney_general_demshare,along_with=year,n=4),
			#	 secretary_state_demshare_lag = lag.new(secretary_state_demshare,along_with=year,n=4),
			#	 treasurer_demshare_lag = lag.new(treasurer_demshare,along_with=year,n=4),
				 sldl_demshare_lag = lag.new(sldl_demshare,along_with=year,n=2),
				 sldl_demshare_lag4 = lag.new(sldl_demshare,along_with=year,n=4),
				 # sldl_incshare_lag = lag.new(sldl_incshare,along_with=year,n=2),
				 sldu_demshare_lag = lag.new(sldu_demshare,along_with=year,n=2),
				 sldu_demshare_lag4 = lag.new(sldu_demshare,along_with=year,n=4),
				 countyleg_avg_demshare_lag = lag.new(countyleg_avg_demshare,along_with=year,n=2),
				 mayor_demshare_lag = lag.new(mayor_demshare,along_with=year,n=4),
				 
				 pres_demshare_laglag = lag.new(pres_demshare_lag,along_with=year,n=4),
				 sen_demshare_laglag = lag.new(sen_demshare_lag,along_with=year,n=6),
				 house_demshare_laglag = lag.new(house_demshare_lag,along_with=year,n=2),
				 gov_demshare_laglag = lag.new(gov_demshare_lag,along_with=year,n=2),
				 stateoffice_demshare_laglag = lag.new(stateoffice_demshare_lag,along_with=year,n=4),
				# attorney_general_demshare_laglag = lag.new(attorney_general_demshare_lag,along_with=year,n=4),
				# secretary_state_demshare_laglag = lag.new(secretary_state_demshare_lag,along_with=year,n=4),
				# treasurer_demshare_laglag = lag.new(treasurer_demshare_lag,along_with=year,n=4),
				 sldl_demshare_laglag = lag.new(sldl_demshare_lag,along_with=year,n=2),
				 sldu_demshare_laglag = lag.new(sldu_demshare_lag,along_with=year,n=2),
				 countyleg_avg_demshare_laglag = lag.new(countyleg_avg_demshare_lag,along_with=year,n=2),
				 mayor_demshare_laglag = lag.new(mayor_demshare_lag,along_with=year,n=4),
				 
				 wages_perworker_real_lag = dplyr::lag(wages_perworker_real,order_by=year,n=1),
				 qcew_pay_perworker_real_lag = dplyr::lag(qcew_pay_perworker_real,order_by=year,n=1),
				 employment_lag = dplyr::lag(employment,order_by=year,n=1))

# for Gov and SLDs use four-year lag if 2-year lag is NA - but this results in duplication unless using deltas
econ_counties_cities$gov_demshare_lag[is.na(econ_counties_cities$gov_demshare_lag)] <- econ_counties_cities$gov_demshare_lag4[is.na(econ_counties_cities$gov_demshare_lag)]
econ_counties_cities$sldl_demshare_lag[is.na(econ_counties_cities$sldl_demshare_lag)] <- econ_counties_cities$sldl_demshare_lag4[is.na(econ_counties_cities$sldl_demshare_lag)]
econ_counties_cities$sldu_demshare_lag[is.na(econ_counties_cities$sldu_demshare_lag)] <- econ_counties_cities$sldu_demshare_lag4[is.na(econ_counties_cities$sldu_demshare_lag)]

econ_counties_cities <- econ_counties_cities %>%
	mutate(pres_demshare_delta = (pres_demshare - pres_demshare_lag),
				 sen_demshare_delta = (sen_demshare - sen_demshare_lag),
				 house_demshare_delta = (house_demshare - house_demshare_lag),
				 gov_demshare_delta = (gov_demshare - gov_demshare_lag),
				 stateoffice_demshare_delta = (stateoffice_demshare - stateoffice_demshare_lag),
				 sldl_demshare_delta = (sldl_demshare - sldl_demshare_lag),
				 sldu_demshare_delta = (sldu_demshare - sldu_demshare_lag),
				 countyleg_avg_demshare_delta = (countyleg_avg_demshare - countyleg_avg_demshare_lag),
				 mayor_demshare_delta = (mayor_demshare - mayor_demshare_lag),
				 wages_perworker_real_delta = (log(wages_perworker_real) - log(wages_perworker_real_lag)),
				 wages_perworker_real_percdelta = ((wages_perworker_real) - (wages_perworker_real_lag))/wages_perworker_real_lag,
				 qcew_pay_perworker_real_delta = (qcew_pay_perworker_real - qcew_pay_perworker_real_lag),
				 employment_delta = (employment - employment_lag))

# summary(econ_counties_cities$mayor_demshare)
# summary(econ_counties_cities$countyleg_avg_demshare)
# summary(econ_counties_cities$sldl_demshare)
# summary(econ_counties_cities$sldu_demshare)
# summary(econ_counties_cities$gov_demshare)
# summary(econ_counties_cities$stateoffice_demshare)
# summary(econ_counties_cities$sen_demshare)
# summary(econ_counties_cities$house_demshare)
# summary(econ_counties_cities$pres_demshare)

## Add city lon/lat back in:
econ_counties_cities$city_lon <- mayors$lon[match(econ_counties_cities$city_fips,mayors$fips)]
econ_counties_cities$city_lat <- mayors$lat[match(econ_counties_cities$city_fips,mayors$fips)]


### Output datasets:
# write.dta(econ_counties_cities, file="econ_counties_cities.dta")
#save(econ_counties_cities,file = "econ_counties_cities.RData")


### Import county legislative elections and create leads and lags

### County elections from DBK & Warshaw 2018:
county_elecs <- read_csv("Election_Returns/county_elecs.csv")
# note: uncontested races are in here but may have demshare of -0.5 or 0.5
counties <- county_elecs %>%
	group_by(fips,county,state,year,district, n_winners) %>%
	summarize(avg_demshare = mean(demshare,na.rm=T))%>%
	group_by(fips,county,state,district) %>%
	mutate(
		countyleg_avg_demshare_delta1=100* (avg_demshare-lag.new(avg_demshare,along_with=year,n=1)),
		countyleg_avg_demshare_delta2=100* (avg_demshare-lag.new(avg_demshare,along_with=year,n=2)),
		countyleg_avg_demshare_delta3=100* (avg_demshare-lag.new(avg_demshare,along_with=year,n=3)),
		countyleg_avg_demshare_delta4=100* (avg_demshare-lag.new(avg_demshare,along_with=year,n=4))
		)
counties$countyleg_avg_demshare_delta_district<-counties$countyleg_avg_demshare_delta1
counties$countyleg_avg_demshare_delta_district[is.na(counties$countyleg_avg_demshare_delta_district)]<-counties$countyleg_avg_demshare_delta2[is.na(counties$countyleg_avg_demshare_delta_district)]
counties$countyleg_avg_demshare_delta_district[is.na(counties$countyleg_avg_demshare_delta_district)]<-counties$countyleg_avg_demshare_delta3[is.na(counties$countyleg_avg_demshare_delta_district)]
counties$countyleg_avg_demshare_delta_district[is.na(counties$countyleg_avg_demshare_delta_district)]<-counties$countyleg_avg_demshare_delta4[is.na(counties$countyleg_avg_demshare_delta_district)]

# note: using na.rm=T above makes it such that counties with some districts with NA values of demshare get included, but without those districts


### Import data on seat shares in county legislatures
# bring in data on council's seatshare:
councilcomp <- read_csv("Partisan_Control_and_Incumbency/councilcomp.csv")
councilcomp <- mutate(councilcomp,
											dem_win_control = as.numeric(seats_dem/seats_total>=0.5)) # is this right or already lagged?
councilcomp <- councilcomp %>%
	group_by(fips) %>%
	mutate(dem_control = dplyr::lag(dem_win_control,order_by=year,n=1))
counties <- left_join(counties,councilcomp[,c("fips","year","dem_control")],by=c("fips" = "fips","year" = "year"))


### Load the base data on party that holds senate seat based on previous election
senate_incumbents <- read.csv("Partisan_Control_and_Incumbency/senate_incumbents.csv")
senate_incumbents <- dplyr::select(senate_incumbents, POAbrv, election, senate_dem_incumb)
senate_incumbents$POAbrv <- as.vector(senate_incumbents$POAbrv)
econ_counties_cities <- merge(econ_counties_cities, senate_incumbents, by.x=c("state_abb","year"), by.y=c("POAbrv", "election"), all.x=T)


### Load data on partisan composition of federal government
federal_partisan_control <- read.csv("Partisan_Control_and_Incumbency/federal_partisan_control.csv")
econ_counties_cities <- merge(econ_counties_cities, federal_partisan_control, by="year")
dim(econ_counties_cities) # 1565787 - this took away rows

### Clean up the elections data a bit 
econ_counties_cities$population <- as.numeric(as.vector(econ_counties_cities$population))
econ_counties_cities$pres_demshare[(econ_counties_cities$pres_demshare==0 | econ_counties_cities$pres_demshare==1)&econ_counties_cities$state_abb=="MN"] <- NA
econ_counties_cities$sen_demshare[(econ_counties_cities$sen_demshare==0 | econ_counties_cities$sen_demshare==1)&econ_counties_cities$state_abb=="MN"] <- NA
econ_counties_cities$house_demshare[(econ_counties_cities$house_demshare==0 | econ_counties_cities$house_demshare==1)&econ_counties_cities$state_abb=="MN"] <- NA
econ_counties_cities$gov_demshare[(econ_counties_cities$gov_demshare==0 | econ_counties_cities$gov_demshare==1)&econ_counties_cities$state_abb=="MN"] <- NA
econ_counties_cities$sldl_demshare[(econ_counties_cities$sldl_demshare==0 | econ_counties_cities$sldl_demshare==1)&econ_counties_cities$state_abb=="MN"] <- NA
econ_counties_cities$sldu_demshare[(econ_counties_cities$sldu_demshare==0 | econ_counties_cities$sldu_demshare==1)&econ_counties_cities$state_abb=="MN"] <- NA
econ_counties_cities$mayor_demshare[(econ_counties_cities$mayor_demshare==0 | econ_counties_cities$mayor_demshare==1) &econ_counties_cities$state_abb=="MN" ] <- NA
econ_counties_cities$sldl_total_votes <- econ_counties_cities$sldl_dem_votes+econ_counties_cities$sldl_rep_votes
econ_counties_cities$sldu_total_votes <- econ_counties_cities$sldu_dem_votes+econ_counties_cities$sldu_rep_votes
econ_counties_cities$pres_total_votes <- econ_counties_cities$pres_dem_votes+econ_counties_cities$pres_rep_votes


### Calculate the average number of votes cast in each year. We'll use this later to create scaled measures of the number 
### of votes cast per office in each county.
econ_counties_years <- group_by(econ_counties_cities, year) %>%
	summarise(
	mean_population=mean(population, na.rm=T)
)

## Create a variable that summarizes election results at the state and local levels
econ_counties_local <- group_by(counties, year, fips) %>%
summarise(
		countyleg_avg_demshare_delta_alldistricts=mean(countyleg_avg_demshare_delta_district, na.rm=T)
	)
econ_counties_cities <- merge(econ_counties_cities,econ_counties_local,by=c("fips", "year"), all.x=T)


### Merge some stuff together
econ_counties_cities <- merge(econ_counties_cities, econ_counties_years, by="year", all.x=T)
econ_counties_cities <- subset(econ_counties_cities, fips!="000NA")

## create a measure of year 2000 population
econ_counties_cities_2010 <- subset(econ_counties_cities, year==2010)
econ_counties_cities_2010 <- dplyr::select(econ_counties_cities_2010,fips, population) %>%
	rename(population_2010=population)
econ_counties_cities <- merge(econ_counties_cities, econ_counties_cities_2010, by="fips", all.x=T)
dim(econ_counties_cities) # 197929 - this merge added 23410 rows

### Create measures we can use to weight regressions based on the number of votes cast in each county

econ_counties_cities$population_scaled <- econ_counties_cities$population/econ_counties_cities$mean_population

econ_counties_cities <- group_by(econ_counties_cities, fips) %>%
	mutate( 
	countyleg_avg_demshare_delta4 =100* (countyleg_avg_demshare-lag.new(countyleg_avg_demshare,along_with=year,n=4))
)
ungroup(econ_counties_cities)


## Make percentages 0-100 rather than 0-1
econ_counties_cities <- mutate(econ_counties_cities, 
	earnings_delta=log(earnings_perworker_real)-log(lag.new(earnings_perworker_real,along_with=year,n=1)),
	employment_delta=100*(log(employment)-log(employment_lag)),
	mayor_demshare=mayor_demshare*100,
	countyleg_avg_demshare=countyleg_avg_demshare*100,
	stateoffice_demshare=stateoffice_demshare*100,
	pres_demshare=pres_demshare*100,
	sen_demshare=sen_demshare*100,
	house_demshare=house_demshare*100,
	gov_demshare=gov_demshare*100,
	sldl_demshare=sldl_demshare*100,
	sldu_demshare=sldu_demshare*100,
	mayor_demshare_delta=mayor_demshare_delta*100,
	stateoffice_demshare_delta=stateoffice_demshare_delta*100,
	countyleg_avg_demshare_delta=countyleg_avg_demshare_delta*100,
	pres_demshare_delta=pres_demshare_delta*100,
	sen_demshare_delta=sen_demshare_delta*100,
	#attorney_general_demshare_delta=attorney_general_demshare_delta*100,
	#secretary_state_demshare_delta=secretary_state_demshare_delta*100,
	#treasurer_demshare_delta=treasurer_demshare_delta*100,
	house_demshare_delta=house_demshare_delta*100,
	gov_demshare_delta=gov_demshare_delta*100,
	sldl_demshare_delta=sldl_demshare_delta*100,
	sldu_demshare_delta=sldu_demshare_delta*100,
	mayor_demshare_lag=mayor_demshare_lag*100,
	countyleg_avg_demshare_lag=countyleg_avg_demshare_lag*100,
	pres_demshare_lag=pres_demshare_lag*100,
	sen_demshare_lag=sen_demshare_lag*100,
	house_demshare_lag=house_demshare_lag*100,
	gov_demshare_lag=gov_demshare_lag*100,
	sldl_demshare_lag=sldl_demshare_lag*100,
	sldu_demshare_lag=sldu_demshare_lag*100,
	pres_demshare_delta_delta=pres_demshare_lag-pres_demshare_laglag,
	house_demshare_delta_delta=house_demshare_lag-house_demshare_laglag,
	wages_perworker_real_delta=wages_perworker_real_delta*100,
	qcew_pay_perworker_real_delta = log(qcew_pay_perworker_real) - log(qcew_pay_perworker_real_lag),
	econ_delta = (employment_delta+wages_perworker_real_delta)/2,
)


## Create average vote shares and deltas by office

econ_counties_cities <- mutate(econ_counties_cities, 
	partisanship_federal=rowMeans(cbind(pres_demshare,sen_demshare,house_demshare), na.rm=T),
	partisanship_federal_lag=rowMeans(cbind(pres_demshare_lag,sen_demshare_lag,house_demshare_lag), na.rm=T),
	partisanship_federal_delta=rowMeans(cbind(pres_demshare_delta,sen_demshare_delta,house_demshare_delta), na.rm=T),
	partisanship_state=rowMeans(cbind(gov_demshare,sldl_demshare,stateoffice_demshare), na.rm=T),
	partisanship_state_lag=rowMeans(cbind(gov_demshare_lag,sldl_demshare_lag,stateoffice_demshare_lag), na.rm=T),
	partisanship_state_delta=rowMeans(cbind(gov_demshare_delta,sldl_demshare_delta,stateoffice_demshare_delta), na.rm=T),
	partisanship_state_delta2=rowMeans(cbind(gov_demshare_delta,sldl_demshare_delta,stateoffice_demshare_delta,countyleg_avg_demshare_delta_alldistricts), na.rm=T)
	)


## Create summary data to do descriptive analysis at the national level


econ_counties_years3 <- subset(econ_counties_cities,  !is.na(population)) %>%
group_by( year) %>%
	summarise(
	mean_pres_demshare_delta=weighted.mean(pres_demshare_delta, population,na.rm=T),
	mean_house_demshare_delta=weighted.mean(house_demshare_delta, population,na.rm=T),
	mean_wages_perworker_real_delta=weighted.mean(wages_perworker_real_delta, population,na.rm=T),
	mean_employment_delta=weighted.mean(employment_delta, population,na.rm=T),
	mean_econ_delta=weighted.mean(econ_delta, population,na.rm=T)
)
econ_counties_years3 <- merge(econ_counties_years3,federal_partisan_control, by="year")

econ_counties_years3$pres_dem_control <- NA
econ_counties_years3$pres_dem_control[econ_counties_years3$pres_party==0] <-  0
econ_counties_years3$pres_dem_control[econ_counties_years3$pres_party==1] <-  1

econ_counties_years3$pres_dem_control2 <- NA
econ_counties_years3$pres_dem_control2[econ_counties_years3$pres_party==0] <-  -1
econ_counties_years3$pres_dem_control2[econ_counties_years3$pres_party==1] <-  1
econ_counties_years3$mean_pres_demshare_delta2 <- econ_counties_years3$mean_pres_demshare_delta*econ_counties_years3$pres_dem_control2
econ_counties_years3$mean_house_demshare_delta2 <- econ_counties_years3$mean_house_demshare_delta*econ_counties_years3$pres_dem_control2


econ_counties_cities$pres_dem_control <- NA
econ_counties_cities$pres_dem_control[econ_counties_cities$pres_party==0] <-  0
econ_counties_cities$pres_dem_control[econ_counties_cities$pres_party==1] <-  1

## Create a version of variable for whether there is a democratic president that is -1/1
econ_counties_cities$pres_dem_control2 <- NA
econ_counties_cities$pres_dem_control2[econ_counties_cities$pres_dem_control==0] <-  -1
econ_counties_cities$pres_dem_control2[econ_counties_cities$pres_dem_control==1] <-  1

## Create a version of variable for the time period
econ_counties_cities$period <- NA
econ_counties_cities$period[econ_counties_cities$year<1993] <- 'period1'
econ_counties_cities$period[econ_counties_cities$year>1992] <- 'period2'
## allows us to make fixed effects for county x time period

## remove duplicate observations
econ_counties_cities$duplicate <- duplicated(paste(econ_counties_cities$fips, econ_counties_cities$year))
econ_counties_cities <- subset(econ_counties_cities, duplicate==F)
dim(econ_counties_cities) # down to 166030 rows


## Import and manipulate data on newspapers to do analysis of media effects
## First data source on newspapers
newspapers_counties_biggest_in_paper <- read.csv(file="Media/newspapers_counties_biggest_in_paper.csv")
newspapers_counties_biggest_in_paper$county_newspaper <- 1
newspapers_counties_biggest_in_paper <- dplyr::select(newspapers_counties_biggest_in_paper, county, county_newspaper)
econ_counties_cities$fips_numeric <- as.numeric(econ_counties_cities$fips)
econ_counties_cities <- merge(econ_counties_cities, newspapers_counties_biggest_in_paper, by.x=c("fips_numeric"), by.y="county", all.x=T)
dim(econ_counties_cities) # still 166030
econ_counties_cities$county_newspaper[is.na(econ_counties_cities$county_newspaper)] <- 0

## Second data source on newspapers
newspapers_panel <- read_csv(file="Media/counties_papers_panel.csv")
newspapers_panel <- dplyr::select(newspapers_panel, county_fips, years, dailypaper, weeklypaper)
econ_counties_cities <- merge(econ_counties_cities, newspapers_panel, by.x=c("fips", "year"), by.y=c("county_fips", "years"), all.x=T)
dim(econ_counties_cities) # 165980


## Import and merge data on partisan control of state governments
state_partisan_control <- read_csv(file="Partisan_Control_and_Incumbency/state_partisan_control.csv")
econ_counties_cities <- merge(econ_counties_cities, state_partisan_control, by.x=c("state_abb", "year"), by.y=c("abb", "year"), all.x=T)
dim(econ_counties_cities) # 165980
econ_counties_cities <- dplyr::rename(econ_counties_cities, gov_dem_control=gov_party)




#### Create lags and leads for appendix and robustness checks
econ_counties_cities <- group_by(econ_counties_cities, fips) %>%
	mutate( 
	wages_perworker_real_log=log(wages_perworker_real+1)*100,
	wages_perworker_real_delta_lag3 =lag.new(wages_perworker_real_delta,along_with=year,n=3),
	wages_perworker_real_delta_lag2 =lag.new(wages_perworker_real_delta,along_with=year,n=2),
	wages_perworker_real_delta_lag1 =lag.new(wages_perworker_real_delta,along_with=year,n=1),
	wages_perworker_real_lag3 =lag.new(wages_perworker_real_log,along_with=year,n=3),
	wages_perworker_real_lag2 =lag.new(wages_perworker_real_log,along_with=year,n=2),
	wages_perworker_real_lag1 =lag.new(wages_perworker_real_log,along_with=year,n=1),
	wages_perworker_real_delta_lead1 =lead.new(wages_perworker_real_delta,along_with=year,n=1),
	wages_perworker_real_delta_lead2 =lead.new(wages_perworker_real_delta,along_with=year,n=2),
	wages_perworker_real_delta_lead3 =lead.new(wages_perworker_real_delta,along_with=year,n=3),
	wages_perworker_real_delta_lead4 =lead.new(wages_perworker_real_delta,along_with=year,n=4),
	wages_perworker_real_delta_lead5 =lead.new(wages_perworker_real_delta,along_with=year,n=5),
	wages_perworker_real_lead1 =lead.new(wages_perworker_real_log,along_with=year,n=1),
	wages_perworker_real_lead2 =lead.new(wages_perworker_real_log,along_with=year,n=2),
	wages_perworker_real_lead3 =lead.new(wages_perworker_real_log,along_with=year,n=3),
	wages_perworker_real_lead4 =lead.new(wages_perworker_real_log,along_with=year,n=4),
	wages_perworker_real_lead5 =lead.new(wages_perworker_real_log,along_with=year,n=5),
	partisanship_federal_delta_lagged =lag.new(partisanship_federal_delta,along_with=year,n=2)
	#partisanship_federal_lagged =lag.new(partisanship_federal,along_with=year,n=2),
#	partisanship_state_lagged =lag.new(partisanship_state,along_with=year,n=2)

)


## remove duplicates
econ_counties_cities$duplicate <- duplicated(paste(econ_counties_cities$fips, econ_counties_cities$year))
econ_counties_cities <- subset(econ_counties_cities, duplicate==F)

election_types <- read_csv(file="Partisan_Control_and_Incumbency/election_types.csv")
econ_counties_cities <- merge(econ_counties_cities, election_types, by="year", all.x=T)
dim(econ_counties_cities) # 165980
econ_counties_cities$election_type2[econ_counties_cities$election_type=="presidential"] <- 'presidential'
econ_counties_cities$election_type2[econ_counties_cities$election_type!="presidential"] <- 'nonpresidential'

## create variable for midterm elections
econ_counties_cities$midterm[econ_counties_cities$election_type2=="nonpresidential"] <- 1
econ_counties_cities$midterm[econ_counties_cities$election_type2=="presidential"] <- 0

## change infinite values to NA
econ_counties_cities[mapply(is.infinite, econ_counties_cities)] <- NA

if (include_licensed==0){
	econ_counties_cities$gov_dem[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$gov_dem[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$gov_dem_votes[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$gov_demshare[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$gov_demshare_delta[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$gov_demshare_lag[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$gov_demshare_lag4[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$gov_demshare_laglag[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$gov_rep_votes[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$gov_total_votes[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$gov_total_votes_scaled[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$gov_votes_total2p[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$house_dem[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$house_dem_votes[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$house_demshare[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$house_demshare_delta[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$house_demshare_delta_delta[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$house_demshare_lag[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$house_demshare_laglag[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$house_rep_votes[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$house_total_votes[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$house_total_votes_scaled[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$house_votes_total2p[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$pres_dem[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$pres_dem_votes[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$pres_demshare[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$pres_demshare_delta[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$pres_demshare_delta_delta[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$pres_demshare_lag[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$pres_demshare_laglag[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$pres_rep_votes[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$pres_total_votes[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$pres_total_votes_scaled[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$pres_votes_total2p[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$sen_dem[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$sen_dem_votes[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$sen_demshare[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$sen_demshare_delta[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$sen_demshare_lag[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$sen_demshare_laglag[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$sen_rep_votes[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$sen_total_votes[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$sen_total_votes_scaled[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA
	econ_counties_cities$sen_votes_total2p[econ_counties_cities$year>1990 & econ_counties_cities$year<2016] <- NA

}

data_analysis <- econ_counties_cities
rm(econ_counties_cities)
gc()
dim(data_analysis) # 165980
counties_fips<-read.csv("counties_fips.csv")
counties_fips<-dplyr::select(counties_fips, -county_name)

data_analysis<-merge(data_analysis, counties_fips, by.x="fips_numeric", by.y="fips2", all.x=T)
data_analysis$state_abb<- as.vector(data_analysis$state_abb.y)
data_analysis$state_abb[is.na(data_analysis$state_abb)]<- data_analysis$state_abb.x[is.na(data_analysis$state_abb)]
data_analysis<-dplyr::select(data_analysis, -state_abb.x, -state_abb.y)

## Create a dummy variable for each state-year
data_analysis$state_year <- paste(data_analysis$state_abb, data_analysis$year, sep="")

save(data_analysis, file="econ_counties_cities_analysis.Rdata")
# write.dta(econ_counties_cities, file="econ_counties_cities_analysis.dta")
